{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# First check whether data format is correct\n",
    "\n",
    "key_list = ['author', 'title', 'year', 'Conference']\n",
    "for line in open('./dataset/FilteredDBLP.txt', 'r', encoding='utf-8'):\n",
    "    line = line.strip('\\n')\n",
    "    if len(line) <= 0:\n",
    "        continue\n",
    "    if line != '#########':\n",
    "        word_list = line.split('\\t')\n",
    "        if len(word_list) != 2:\n",
    "            print('Strange line: %s' % line)\n",
    "        else:\n",
    "            if word_list[0] not in key_list:\n",
    "                print('Strange key: %s in %s' % (word_list[0], line))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from dblp import DBLP, DBLPAuthorList\n",
    "dataset = DBLP('./dataset/FilteredDBLP.txt')\n",
    "author_list = DBLPAuthorList(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 找出某个会议的研究者\n",
    "# 以CVPR为例\n",
    "author_list.get_by_conference('CVPR')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 找出CVPR中已经不活跃的（也就是最后一次发论文的时间在2013年之前（包括2013年））\n",
    "\n",
    "author_list.get_before_year('CVPR', 2013)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 找出CVPR中仍在活跃的（也就是最后一次发论文的时间在2014年之后（包括2014年））\n",
    "\n",
    "author_list.get_after_year('CVPR', 2014)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xt8lOWZ//HPRRLOZxIOAhIOEUEr\niEFBPFsVbbdoq9aurdhasa1a291qsb/teW2rXXXbtXWL1RWtioq2spZqqZW1WEACIqdwiOEUCSQQ\nIIGEHK/fH/NEU03CJJnJnL7v12teM7nnnmfuO4TnO8/13DNj7o6IiKSeLrEegIiIxIYCQEQkRSkA\nRERSlAJARCRFKQBERFKUAkBEJEUpAEREUpQCQEQkRSkARERSVHqsB9CazMxMz87OjvUwREQSyurV\nq/e7e9bx+sV1AGRnZ5OXlxfrYYiIJBQz2xlOP5WARERSlAJARCRFKQBERFKUAkBEJEUpAEREUpQC\nQEQkRSkARERSlAJARCTOvLC6iGdX7Yr68ygARETizKPLtvPS2j1Rfx4FgIhIHDlUWUP+3nKmjRkU\n9edSAIiIxJG3tpfhjgJARCTVrCgso1t6FyaN7Bf151IAiIjEkRWFBzhj1AC6padF/bkUACIicaIz\n6/+gABARiRudWf8HBYCISNzozPo/KABEROJGZ9b/QQEgIhIXOrv+DwoAEZG40Nn1f1AAiIjEhc6u\n/4MCQEQkLnR2/R8UACIiMReL+j8oAEREYi4W9X9QAIiIxFws6v8QRgCYWXcze8vM3jGzjWb2w6B9\ntJmtNLNtZvasmXUN2rsFPxcE92c32dbdQfsWM7ssWpMSEUkksaj/Q3hHANXARe4+CZgMzDSzacC9\nwIPungMcBG4K+t8EHHT3ccCDQT/MbCJwHXAKMBP4tZl17mxFROJMrOr/EEYAeMiR4MeM4OLARcDC\noH0+cGVwe1bwM8H9F5uZBe0L3L3a3bcDBcCZEZmFiEiCilX9H8I8B2BmaWa2FigBlgDvAofcvS7o\nUgQMD24PB3YDBPcfBgY1bW/mMU2fa46Z5ZlZXmlpadtnJCKSQGJV/4cwA8Dd6919MjCC0Kv2Cc11\nC66thftaav/wc81z91x3z83KygpneCIiCStW9X9o4yogdz8ELAWmAf3NLD24awTQ+A3GRcBIgOD+\nfkBZ0/ZmHiMiknJiWf+H8FYBZZlZ/+B2D+DjQD7wOnB10G028FJwe1HwM8H9f3V3D9qvC1YJjQZy\ngLciNRERkUQTy/o/QPrxuzAMmB+s2OkCPOfuL5vZJmCBmf078DbwaND/UeBJMysg9Mr/OgB332hm\nzwGbgDrgVnevj+x0REQSRyzr/xBGALj7OuD0ZtoLaWYVj7sfA65pYVv3APe0fZgiIsknlvV/0DuB\nRURiItb1f1AAiIjERKzr/6AAEBGJiVjX/0EBICISE7Gu/4MCQESk08VD/R8UACIinS4e6v+gABAR\n6XTxUP8HBYCISKeLh/o/KABERDpVvNT/QQEgItKp4qX+DwoAEZFOFS/1f1AAiIh0qnip/4MCQESk\n08RT/R8UACIinSae6v+gABAR6TTxVP8HBYCISKeJp/o/KABERDpFvNX/QQEgItIp4q3+DwoAEZFO\nEW/1f1AAiIh0inir/4MCQEQk6uKx/g8KABGRqIvH+j8oAEREoi4e6/+gABARibp4rP+DAkBEJKri\ntf4PCgARkaiK1/o/hBEAZjbSzF43s3wz22hmdwTtPzCz98xsbXC5oslj7jazAjPbYmaXNWmfGbQV\nmNnc6ExJRCR+xGv9HyA9jD51wL+6+xoz6wOsNrMlwX0Puvt/NO1sZhOB64BTgBOAv5jZScHdvwIu\nAYqAVWa2yN03RWIiIiLxKF7r/xDGEYC7F7v7muB2BZAPDG/lIbOABe5e7e7bgQLgzOBS4O6F7l4D\nLAj6iogkpXiu/0MbzwGYWTZwOrAyaLrNzNaZ2WNmNiBoGw7sbvKwoqCtpXYRkaQUz/V/aEMAmFlv\n4AXgG+5eDjwMjAUmA8XA/Y1dm3m4t9L+4eeZY2Z5ZpZXWloa7vBEROJOPNf/IcwAMLMMQjv/p9z9\nRQB33+fu9e7eADxCqMQDoVf2I5s8fASwp5X2f+Du89w9191zs7Ky2jofEZG4Ec/1fwhvFZABjwL5\n7v5Ak/ZhTbpdBWwIbi8CrjOzbmY2GsgB3gJWATlmNtrMuhI6UbwoMtMQEYkv8V7/h/BWAc0AvgCs\nN7O1Qdt3gM+Z2WRCZZwdwC0A7r7RzJ4DNhFaQXSru9cDmNltwKtAGvCYu2+M4FxEROJGvNf/IYwA\ncPdlNF+/X9zKY+4B7mmmfXFrjxMRSRbxXv8HvRNYRCQqlsd5/R8UACIiEXeosobNcV7/BwWAiEjE\nrUyA+j8oAEREIm5F4YG4r/+DAkBEJOJWFJbFff0fFAAiIhGVKPV/UACIiERUotT/QQEgIhJRiVL/\nBwWAiEhEJUr9HxQAIiIRk0j1f1AAiIhETCLV/0EBICISMYlU/wcFgIhIxCRS/R8UACIiEZFo9X9Q\nAIiIRESi1f9BASAiEhGJVv8HBYCISEQkWv0fFAAiIh2WiPV/UACIiHRYItb/QQEgItJhiVj/BwWA\niEiHJWL9HxQAIiIdkqj1f1AAiIh0SKLW/0EBICLSIYla/wcFgIhIhyRq/R8UACIi7ZbI9X9QAIiI\ntFsi1/8hjAAws5Fm9rqZ5ZvZRjO7I2gfaGZLzGxbcD0gaDcz+6WZFZjZOjOb0mRbs4P+28xsdvSm\nJSISfYlc/4fwjgDqgH919wnANOBWM5sIzAVec/cc4LXgZ4DLgZzgMgd4GEKBAXwfOAs4E/h+Y2iI\niCSiRK7/QxgB4O7F7r4muF0B5APDgVnA/KDbfODK4PYs4AkPWQH0N7NhwGXAEncvc/eDwBJgZkRn\nIyLSSRK9/g9tPAdgZtnA6cBKYIi7F0MoJIDBQbfhwO4mDysK2lpq//BzzDGzPDPLKy0tbcvwREQ6\nTaLX/6ENAWBmvYEXgG+4e3lrXZtp81ba/7HBfZ6757p7blZWVrjDExHpVIle/4cwA8DMMgjt/J9y\n9xeD5n1BaYfguiRoLwJGNnn4CGBPK+0iIglnRWEZudmJW/+H8FYBGfAokO/uDzS5axHQuJJnNvBS\nk/YbgtVA04DDQYnoVeBSMxsQnPy9NGgTEUko79f/Rydu+QcgPYw+M4AvAOvNbG3Q9h3gZ8BzZnYT\nsAu4JrhvMXAFUABUAl8EcPcyM/sxsCro9yN3L4vILEREOtH79f+xSR4A7r6M5uv3ABc309+BW1vY\n1mPAY20ZoIhIvFlReIDuGV04bUTi1v9B7wQWEWmzRF//30gBICLSBslS/wcFgIhImyRL/R8UACIi\nbZIs9X9QAIiItEmy1P9BASAiErZkqv+DAkBEJGzJVP8HBYCISNiSqf4PCgARkbAlU/0fFAAiImFJ\ntvo/KABERMKSbPV/UACIiIQl2er/oAAQEQlLstX/QQEgInJcyVj/BwWAiMhxJWP9HxQAIiLHlYz1\nf1AAiIgcVzLW/0EBICLSqmSt/4MCQESkVcla/wcFgIhIq5K1/g8KABGRViVr/R8UACIiLUrm+j8o\nAEREWpTM9X9QAIiItCiZ6/+gABARaVEy1/9BASAi0qz3DlUldf0fwggAM3vMzErMbEOTth+Y2Xtm\ntja4XNHkvrvNrMDMtpjZZU3aZwZtBWY2N/JTERGJjKPVdcx5Io+eGWl8ctIJsR5O1IRzBPA4MLOZ\n9gfdfXJwWQxgZhOB64BTgsf82szSzCwN+BVwOTAR+FzQV0QkrtQ3OHcsWEt+cTn/9c+nMzqzV6yH\nFDXHDQB3fwMoC3N7s4AF7l7t7tuBAuDM4FLg7oXuXgMsCPqKiMSVny7O5y/5+/jeJydy0clDYj2c\nqOrIOYDbzGxdUCIaELQNB3Y36VMUtLXULiISN363Yie/Xbad2dNHceOM0bEeTtS1NwAeBsYCk4Fi\n4P6g3Zrp6620f4SZzTGzPDPLKy0tbefwRETa5o2tpXx/0UYuHJ/Fdz+ZGhXqdgWAu+9z93p3bwAe\nIVTigdAr+5FNuo4A9rTS3ty257l7rrvnZmVltWd4IiJtsmVvBbc+tYacwb35r3+eQnpaaiyQbNcs\nzWxYkx+vAhpXCC0CrjOzbmY2GsgB3gJWATlmNtrMuhI6Ubyo/cMWEYmM0opqvvT4Krp3TePRG6fS\nu1t6rIfUaY47UzN7BrgAyDSzIuD7wAVmNplQGWcHcAuAu280s+eATUAdcKu71wfbuQ14FUgDHnP3\njRGfjYhIGxyrrefmJ/I4cLSa526ZzvD+PWI9pE5l7s2W4uNCbm6u5+XlxXoYIpKEGhqc2595m8Ub\ninn4+jOYeerQWA8pYsxstbvnHq9fahS6REQ+5P4lW/jj+mLmzjw5qXb+baEAEJGU83zebn71+rtc\nN3Ukc84bE+vhxIwCQERSyvJ3D/Cd369nxrhB/PjKUzFrbpV6alAAiEjKKCw9wld+t5oTB/bk19ef\nQUaKLPdsSWrPXkRSRtnRGr70+CrSuhj/c+OZ9OuREeshxVzqLHgVkZRVXVfPV55czZ7Dx3jm5rM4\ncVDPWA8pLugIQESSmrtz9wvreWtHGT+/+jTOGDUw1kOKGwoAEUlqD/21gBfffo9/ueQkZk3WZ1A2\npQAQkaT10tr3uH/JVq46fTi3XzQu1sOJOwoAEUlKq3eWcefCdUzNHsDPPvOxlF7u2RIFgIgknV0H\nKpnzxGqG9evOb76Qm7Rf6t5RCgARSSqHq2r54uNvUdfgPHbjVAb26hrrIcUtBYCIJI3a+ga+9tRq\ndh6o5L8/fwZjs3rHekhxTe8DEJGk4O589w8beLPgAPddfRrTxw6K9ZDino4ARCQpPPK3Qhas2s3X\nLhjLtbkjj/8AUQCISOJ7ZcNefvqnzXziY8P41qXjYz2chKEAEJGEtq7oEN949m0mjejP/ddOoksX\nLfcMlwJARBLWnkNV3DQ/j0G9uvHIDbl0z9Byz7ZQAIhIQjpSXceXHl/FsZp6HrtxKll9usV6SAlH\nq4BEJOHU1Tdw+9Nr2FZyhMdunMr4oX1iPaSEpCMAEUk4//7HfF7fUsoPPnUK55+UFevhJCwFgIgk\nlMff3M7jf9/BTeeM5gvTRsV6OAlNASAiCeP1zSX86OVNfHzCEL5zxYRYDyfhKQBEJCFs2lPObU+v\nYcKwvvziusmkablnhykARCTulZQf46b5q+jdPZ1HZ0+lVzetX4kE/RZFJK5V1tRx0/w8DlXW8vxX\npjO0X/dYDylpHPcIwMweM7MSM9vQpG2gmS0xs23B9YCg3czsl2ZWYGbrzGxKk8fMDvpvM7PZ0ZmO\niCSLhgZn8fpiZj30Jhv2HOaXnzudU4f3i/Wwkko4JaDHgZkfapsLvObuOcBrwc8AlwM5wWUO8DCE\nAgP4PnAWcCbw/cbQEBFpyt3588a9fOK/lvG1p9bQ4M68L+RyycQhsR5a0jluCcjd3zCz7A81zwIu\nCG7PB5YC3w7an3B3B1aYWX8zGxb0XeLuZQBmtoRQqDzT4RmISFJwd5ZuLeXBJVtZV3SY7EE9efCz\nk/jUpOE64Rsl7T0HMMTdiwHcvdjMBgftw4HdTfoVBW0ttYtIinN33iw4wANLtrBm1yFGDOjBfZ85\njU9PGU56mtapRFOkTwI3F9PeSvtHN2A2h1D5iBNPPDFyIxORuLOy8AD3L9nKW9vLGNavO/dcdSrX\nnDGSruna8XeG9gbAPjMbFrz6HwaUBO1FQNNvYhgB7AnaL/hQ+9LmNuzu84B5ALm5uc2GhIgkttU7\nD/Lgkq0sK9jP4D7d+OGnTuGzU0fq0zw7WXsDYBEwG/hZcP1Sk/bbzGwBoRO+h4OQeBX4SZMTv5cC\nd7d/2CKSiNYVHeKBJVtZuqWUQb268m+fmMDnp43Sjj9GjhsAZvYMoVfvmWZWRGg1z8+A58zsJmAX\ncE3QfTFwBVAAVAJfBHD3MjP7MbAq6PejxhPCIpL8Nu0p58G/bGXJpn3075nBt2eezA3TR+kNXTFm\noQU78Sk3N9fz8vJiPQwRaaet+yr4z79sZfH6vfTpns7N547hizOy6dM9I9ZDS2pmttrdc4/XT/Er\nIhFXWHqEX7y2jUXv7KFX13S+ftE4bjp3DP16aMcfTxQAIhIxuw5U8ovXtvH7t4volp7GLeeN5Zbz\nxjCgV9dYD02aoQAQkQ5771AVD/11G8/nFZHWxfjSjNF85YKxZPbW1zTGMwWAiLTb3sPH+NXrBSxY\ntQvDuP6sE/naheMY0lcf2JYIFAAi0mYlFcf476WF/G7lThoanGunjuS2C8dxQv8esR6atIECQDrs\nWG091bUN9OupE3zJruxoDb/5v3eZv3wHtfXOZ6YM5/aLchg5sGeshybtoACQDlmz6yC3PLma/Ueq\n+djwfswYl8m54zKZMmqA3tyTBBoanKKDVWwqPkzejoM889YuKmvruXLycL5+cQ6jM3vFeojSAQoA\nabcXVhdx94vrGdKvG7dfOI7lhQd45I1CHl76Lt0zujA1eyDn5mQyY1wmE4b2pYs+0TGuHautZ+u+\nCvKLy9m0p5xNxeXkF1dwpLoOgLQuxsxTh/LNj+cwbnCfGI9WIkEBIG1W3+Dc+8pm5r1RyPQxg/j1\n9VPeX+Z3pLqOlYUH+Nu2/Swr2M9PFm8GYFCvrswYl8k54zI5JydTteIYO3CkOtjBf7Czf7f0KPUN\noTeG9uqaxoRhffn0lOFMHNaXiSf05aQhfXRUl2T0TmBpk8NVtXz9mbf5v62l3DB9FN/95EQyWvnI\n3r2Hj7GsYD/LtpWyrOAA+49UAzAmq1coDMZlMm3sIPrqnaFR0dDg7DhwlE3Bjj6/OLSz31de/X6f\nE/p1Z0Kwk2/c2Y8c0FNHbAks3HcCKwAi7Gh1HVW19Um5/rmw9AhffiKPXQcq+eGsU7j+rFFtery7\ns2VfBcuCo4OVhWVU1daT1sWYNKIf5+RkcW5OJpNH9m81VKR5VTX1bN5b/v7OflNxOVv2VlBZUw9A\nehdj3ODeH+zoh/VlwrC+epNWElIAxMDrm0uY++I6yqvquPOy8cw+Oztpvsno/7aWctvTa8hI68LD\n10/hrDGDOrzN6rp61uw8xJsF+/lbwX7WFx2iwUPlh2ljBnFOTugIYdzg3pglx+8xEtyd0opQCafp\nzn77/qM0/nfu0z39/VfzjTv6nCG96ZauEk4qUAB0osNVtfz7y5t4fnURJw3pzQn9e7B0SylnjBrA\nvZ85jXGDe8d6iO3m7jy6bDs/WZzPSUP68MgNuVFb8ne4spblhfvfP3+w80AlAEP6dgutLgpOKA/u\nk7xvMmpocA5W1lBSUc2+8mOUlFdTUnGMfeXBzxXVFB2sZP+RmvcfM3JgDyYM/ccSzvD+PRSaKUwB\n0EmWbilh7gvrKak4xlcvGMvXL86ha1oX/rD2PX6waBNVtfV88+MncfO5oxPu6+2q6+r5t99v4PnV\nRVx2yhAeuHZyp3587+6yyuD8wX7efHc/hyprATh5aJ/QCeWcTM7MHpgQHyns7hysrH1/Jx7auX9w\ne195NaUVoZ19bf1H/0/275nBkD7dGdy3G8Maa/bD+nLysL76gDX5CAVAlJUfq+Wel/N5Nm83OYN7\n8x/XTGLSyP7/0Kek4hjf+8NGXtm4l9NG9OPnV09i/NDEWD5XUnGMrzy5mjW7DvH1i3P4xsU5MT0p\n2NDgbNxTzt8KSnmzYD+rdhykpq4BgK5pXeie0YWeXdPp0TWN7hlp9MjoQo+uafTISKNH1/TQzxlp\ndG9sy0ijZ2PfJm3du4bam/7cIyOtxXMS7s6hylr2Ba/SS/5hB1/NvorQdWlFNTX1DR95fL8eGQzp\n240hfbuT1Sd0PaRPNwb37c6Qvt0Y3CfUrtU30hYKgCh6Y2sp335hHfvKj3HL+WO54+KcFv+DujuL\n1+/ley9toPxYLbdflMNXLxgb1yc5N7x3mJufyONgZQ33XzOZT5w2LNZD+oiqmnrydpaxdtchjtbU\nc6y2nqqaeqpq66ls/Dloe/92cF9jcLRFehdrEihpdE9P40h1Xas79sHBDn1wsCNv3NE3tmvHLtGi\nAIiCimO1/GRxPs+8tZuxWb34j2smcfqJA47/QELrrn/4v5tY9M4eJgzry8+vPo1Th/eL8ojb7n/f\n2cOdC99hYM+uzLshNy7H2FH1Df6RgKis+SAkjjW5XVXzQbBU1X4QNJU19fTuls7gJjv0xlfsg/tq\nxy6xpQCIsL9tK2XuC+spPlzFzeeN4ZsfP6ld/8n/vHEv//aHDRw4WsNXzx/L7RePi4uVGQ0NzgNL\ntvLQ6wXkjhrAw58/g6w+ybeUVSQV6BvBIuRIdR0/WZzP0yt3MSarFwu/ejZTwnzV35xLTxnKWaMH\n8eM/buKh1wt4deNe7rv6tLCPJKLhSHUd33x2LUs27eOzuSP50ZWnxEUoiUh06QigFW8W7OeuhevY\nc7iKm88dw79c0r5X/S1ZuqWEu19cz77yY3w5CtsPx+6ySr48P4+C0iN89xMTmH12tpYPiiQ4lYA6\n4Gh1HT/9Uz6/W7GLMZm9+Pk1p3HGqIFRea6KY7X89E+beXrlLkZn9uLez5zGmaOj81wftvzdA3zt\nqdU0OPzqn6dwTk5mpzyviESXAqCd/v5u6FX/e4equGnGaL512fhOeVX+94L9fPvFdRQdrGL29Gzu\nvGx8VNe3P7liJz9ctJHszF48ckOuPtZXJInoHEAbHa2u495XNvPE8p1kD+rJc7dMZ2p257wSBzh7\nXCav3HEeP391C/OX7+C1zfu499Oncfa4yL4qr61v4AeLNvLUyl1cOD6LX3zudH0Qm0iK0hEAsKLw\nAHcufIeig1V88ezR3HnZeHp0jd1J0FU7yrhr4Tq27z/K5848kbuvODkiO+myozV89XerWbm9jFvO\nH8Ndl52cNJ9VJCIf0BFAGCpr6rjvlS08/vcdjBrUk2fnTO+0+ntrpmYP5E93nMuDS7byyN8KWbql\nhJ98+mNcOH5wu7e5eW85X56fR0lFNQ9+dhJXnT4igiMWkUSUskcAb20v486F77DzQCU3np3NXTPH\n07Nr/OXh2t2HuPP5d9hWcoTPTBnB9z45sc3fvfvqxr1889m19O6Wzrwbcpn8oY+sEJHkoiOAFlTV\n1HPfq5t5/O87GDmgJwvmTGNaBD7aOFomj+zPy18/h4f+WsCvl77LG9tKuefKU7n0lKHHfay789Bf\nC7h/yVYmjejHvBtyGdI3eT9JU0TapkNHAGa2A6gA6oE6d881s4HAs0A2sAO41t0PWmhx+S+AK4BK\n4EZ3X9Pa9iN9BLBqRxl3Pv8OOw5UMnv6KL59+clx+aq/JRveO8ydC9eRX1zOP006gR/800QGtfDF\nM1U19Xxr4Tv8cV0xV50+nJ9++mP6eAKRFBHuEUAkPpHsQnef3OTJ5gKvuXsO8FrwM8DlQE5wmQM8\nHIHnDktVTT0/fnkT1/5mOXUNztM3n8UPZ52aUDt/gFOH92PRbTP410tO4pUNxVz64Bu8vG4PHw7x\nPYequPq//87i9cXcffnJPHDtJO38ReQjorEHnAVcENyeDywFvh20P+GhvdUKM+tvZsPcvTgKY3jf\n6p1lfOv50IqaL0wbxdzLT06Iz49vSUZaF26/OIdLTxnKXQvf4ban3+Z/T9nDj688lcF9urN6Zxm3\nPLmG6tp6Hps9lQtPbv+JYxFJbh3dEzrwZzNz4DfuPg8Y0rhTd/diM2vcAw0Hdjd5bFHQFpUAOFZb\nz/1/3sJvl23nhH49ePrLZ0V8TX0sjR/ahxe+eja/XbadB5Zs5ZIH3uDqM0bw5PKdnNC/OwvmnMW4\nwYnx3QMiEhsdDYAZ7r4n2MkvMbPNrfRtbsH5R05AmNkcQiUiTjzxxHYNandZJbP/5y0KS49y/Vkn\ncvcVE+idwK/6W5Ke1oWvnD+WSyYO4a6F63h02XbOzcnkoc9NafNKIRFJPR3aK7r7nuC6xMx+D5wJ\n7Gss7ZjZMKAk6F4EjGzy8BHAnma2OQ+YB6GTwO0Z1+C+3cge1IsfferUlPh8m7FZvXnulums3X2Q\nSSP6J9xXT4pIbLR7T2FmvcysT+Nt4FJgA7AImB10mw28FNxeBNxgIdOAw9Gq/3dLT+OxG6emxM6/\nUVoX44xRA7XzF5GwdeQIYAjw++Cjg9OBp939FTNbBTxnZjcBu4Brgv6LCS0BLSC0DPSLHXhuERHp\noHYHgLsXApOaaT8AXNxMuwO3tvf5REQkslQvEBFJUQoAEZEUpQAQEUlRCgARkRSlABARSVEKABGR\nFBXXXwhjZqXAzliPox0ygf2xHkQn05xTg+acGEa5e9bxOsV1ACQqM8sL57O4k4nmnBo05+SiEpCI\nSIpSAIiIpCgFQHTMi/UAYkBzTg2acxLROQARkRSlIwARkRSlAAiDmY00s9fNLN/MNprZHUH7QDNb\nYmbbgusBQbuZ2S/NrMDM1pnZlCbbui/YRn7Qp7lvSou5dsz5ZDNbbmbVZvatD21rppltCX4fc2Mx\nn3BEas4tbSceRfLfObg/zczeNrOXO3su4Yrw33Z/M1toZpuD7U2PxZzazd11Oc4FGAZMCW73AbYC\nE4H7gLlB+1zg3uD2FcCfCH0N5jRgZdB+NvAmkBZclgMXxHp+EZrzYGAqcA/wrSbbSQPeBcYAXYF3\ngImxnl+U59zsdmI9v2jOucn2/gV4Gng51nPrjDkD84EvB7e7Av1jPb+2XHQEEAZ3L3b3NcHtCiCf\n0BfazyL0B0BwfWVwexbwhIesAPoHX4/pQHdCfyjdgAxgX6dNpA3aOmd3L3H3VUDthzZ1JlDg7oXu\nXgMsCLYRdyI151a2E3ci+O+MmY0APgH8thOG3m6RmrOZ9QXOAx4N+tW4+6FOmUSEKADayMyygdOB\nlcAQD77WMrgeHHQbDuxu8rAiYLi7LwdeB4qDy6vunt85I2+/MOfckmZ/F5EfZWR1cM4tbSeuRWDO\n/wncBTREaYgR18E5jwFKgf8Jyl6/Db4eN2EoANrAzHoDLwDfcPfy1ro20+ZmNg6YAIwgtBO8yMzO\ni/xII6cNc25xE820xfXSswjynVGeAAABo0lEQVTMOaLb6QwdHauZfRIocffVER9clETg3ycdmAI8\n7O6nA0cJlY4ShgIgTGaWQeiP5Sl3fzFo3heUdgiuS4L2ImBkk4ePAPYAVwEr3P2Iux8hdJ5gWmeM\nvz3aOOeWtPS7iEsRmnNL24lLEZrzDOBTZraDUJnvIjP7XZSG3GER/NsucvfGo7uFhAIhYSgAwhCs\n1HkUyHf3B5rctQiYHdyeDbzUpP2GYDXQNOBwcEi5CzjfzNKDP8DzCdUf40475tySVUCOmY02s67A\ndcE24k6k5tzKduJOpObs7ne7+wh3zyb0b/xXd/98FIbcYRGc815gt5mND5ouBjZFeLjRFeuz0Ilw\nAc4hVLZYB6wNLlcAg4DXgG3B9cCgvwG/IrT6ZT2QG7SnAb8htNPfBDwQ67lFcM5DCb0iKgcOBbf7\nBvddQWilxbvA/4v13KI955a2E+v5Rfvfuck2LyC+VwFF8m97MpAXbOsPwIBYz68tF70TWEQkRakE\nJCKSohQAIiIpSgEgIpKiFAAiIilKASAikqIUACIiKUoBICKSohQAIiIp6v8DAmawT4keHxEAAAAA\nSUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x19253b519e8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# CVPR按照最后一篇论文发表时间的研究者分布\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "cvpr = author_list.get_by_conference('CVPR')\n",
    "author_count = []\n",
    "year = range(2007, 2018)\n",
    "for i in year:\n",
    "    author_count.append(len(cvpr[i]))\n",
    "plt.plot(year, author_count, label='Count')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Active: 6698\n",
      "Inactive: 4758\n"
     ]
    }
   ],
   "source": [
    "# 按照定义，不活跃的研究者和活跃的研究者的占比\n",
    "\n",
    "inactive = 0\n",
    "for i in range(2007, 2014):\n",
    "    inactive += len(cvpr[i])\n",
    "active = 0\n",
    "for i in range(2014, 2018):\n",
    "    active += len(cvpr[i])\n",
    "print(\"Active: %s\" % active)\n",
    "print(\"Inactive: %s\" % inactive)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 用FPGrowth算法计算频繁项集\n",
    "\n",
    "from fp_growth import FPGrowth\n",
    "fp_growth = FPGrowth()\n",
    "fp_growth.fit(dataset.get_author_group_list(), min_support=3, min_size=3)\n",
    "fp_growth.print()\n",
    "\n",
    "# 根据Group的定义，将频繁项集组合成Group\n",
    "\n",
    "from dblp import DBLPAuthorPaper, DBLPGroup\n",
    "\n",
    "author_group = DBLPAuthorPaper(dataset)\n",
    "group = DBLPGroup(fp_growth.items, author_group)\n",
    "print(len(group.groups))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# LDA模型训练，主题数为30\n",
    "\n",
    "from dblp import DBLP\n",
    "from lda import LdaModel\n",
    "\n",
    "TOPIC_NUM = 30\n",
    "\n",
    "dataset = DBLP('./dataset/FilteredDBLP.txt')\n",
    "topic_model = LdaModel()\n",
    "topic_model.fit(dataset.get_title_list(), num_topics=TOPIC_NUM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0,\n",
       "  '0.092*\"plan\" + 0.072*\"constraint\" + 0.059*\"game\" + 0.056*\"agent\" + 0.053*\"multi\" + 0.031*\"class\" + 0.028*\"new\" + 0.025*\"strategi\" + 0.023*\"gener\" + 0.020*\"complex\"'),\n",
       " (1,\n",
       "  '0.078*\"adapt\" + 0.068*\"label\" + 0.060*\"domain\" + 0.055*\"recommend\" + 0.039*\"filter\" + 0.038*\"high\" + 0.037*\"context\" + 0.037*\"multi\" + 0.031*\"awar\" + 0.030*\"dimension\"'),\n",
       " (2,\n",
       "  '0.093*\"learn\" + 0.065*\"supervis\" + 0.056*\"scene\" + 0.054*\"view\" + 0.048*\"discrimin\" + 0.043*\"languag\" + 0.042*\"reconstruct\" + 0.041*\"machin\" + 0.039*\"semi\" + 0.029*\"stereo\"'),\n",
       " (3,\n",
       "  '0.072*\"person\" + 0.069*\"camera\" + 0.068*\"toward\" + 0.031*\"attent\" + 0.028*\"model\" + 0.027*\"fusion\" + 0.026*\"base\" + 0.024*\"part\" + 0.024*\"feedback\" + 0.024*\"qualiti\"'),\n",
       " (4,\n",
       "  '0.156*\"featur\" + 0.078*\"select\" + 0.050*\"embed\" + 0.042*\"base\" + 0.037*\"similar\" + 0.034*\"sequenc\" + 0.031*\"argument\" + 0.029*\"boost\" + 0.027*\"stream\" + 0.024*\"rate\"'),\n",
       " (5,\n",
       "  '0.228*\"network\" + 0.091*\"neural\" + 0.049*\"social\" + 0.046*\"convolut\" + 0.039*\"set\" + 0.028*\"partial\" + 0.027*\"group\" + 0.020*\"use\" + 0.018*\"recurr\" + 0.018*\"observ\"'),\n",
       " (6,\n",
       "  '0.094*\"problem\" + 0.057*\"sampl\" + 0.044*\"mine\" + 0.042*\"solv\" + 0.041*\"pattern\" + 0.027*\"direct\" + 0.025*\"deform\" + 0.024*\"search\" + 0.024*\"algorithm\" + 0.024*\"goal\"'),\n",
       " (7,\n",
       "  '0.097*\"scale\" + 0.097*\"larg\" + 0.074*\"linear\" + 0.058*\"evalu\" + 0.040*\"integr\" + 0.036*\"scalabl\" + 0.031*\"non\" + 0.025*\"test\" + 0.023*\"polici\" + 0.020*\"divers\"'),\n",
       " (8,\n",
       "  '0.184*\"detect\" + 0.128*\"object\" + 0.080*\"time\" + 0.049*\"real\" + 0.034*\"statist\" + 0.024*\"use\" + 0.020*\"re\" + 0.020*\"ground\" + 0.019*\"adversari\" + 0.016*\"world\"'),\n",
       " (9,\n",
       "  '0.051*\"extract\" + 0.049*\"event\" + 0.045*\"word\" + 0.039*\"model\" + 0.037*\"exploit\" + 0.036*\"discoveri\" + 0.027*\"correspond\" + 0.025*\"relev\" + 0.024*\"appear\" + 0.024*\"beyond\"'),\n",
       " (10,\n",
       "  '0.099*\"kernel\" + 0.068*\"tree\" + 0.054*\"resolut\" + 0.050*\"effect\" + 0.032*\"support\" + 0.029*\"autom\" + 0.022*\"vector\" + 0.021*\"use\" + 0.020*\"carlo\" + 0.020*\"mont\"'),\n",
       " (11,\n",
       "  '0.071*\"queri\" + 0.061*\"distribut\" + 0.046*\"map\" + 0.043*\"prefer\" + 0.039*\"rule\" + 0.038*\"data\" + 0.033*\"constrain\" + 0.030*\"driven\" + 0.029*\"analysi\" + 0.027*\"complet\"'),\n",
       " (12,\n",
       "  '0.278*\"learn\" + 0.073*\"deep\" + 0.041*\"represent\" + 0.040*\"activ\" + 0.029*\"random\" + 0.027*\"process\" + 0.027*\"task\" + 0.024*\"multi\" + 0.023*\"transfer\" + 0.022*\"model\"'),\n",
       " (13,\n",
       "  '0.089*\"logic\" + 0.080*\"knowledg\" + 0.063*\"tempor\" + 0.056*\"reason\" + 0.039*\"base\" + 0.039*\"descript\" + 0.034*\"order\" + 0.032*\"ontolog\" + 0.024*\"s\" + 0.023*\"program\"'),\n",
       " (14,\n",
       "  '0.040*\"preserv\" + 0.037*\"correl\" + 0.033*\"heurist\" + 0.028*\"manag\" + 0.028*\"count\" + 0.021*\"symmetri\" + 0.021*\"simpl\" + 0.020*\"search\" + 0.019*\"privaci\" + 0.018*\"cooper\"'),\n",
       " (15,\n",
       "  '0.089*\"cluster\" + 0.049*\"function\" + 0.040*\"align\" + 0.038*\"minim\" + 0.036*\"latent\" + 0.035*\"express\" + 0.032*\"consist\" + 0.030*\"decomposit\" + 0.027*\"model\" + 0.026*\"aggreg\"'),\n",
       " (16,\n",
       "  '0.119*\"inform\" + 0.052*\"text\" + 0.052*\"web\" + 0.039*\"level\" + 0.031*\"document\" + 0.030*\"variat\" + 0.026*\"automat\" + 0.026*\"theoret\" + 0.024*\"use\" + 0.022*\"entiti\"'),\n",
       " (17,\n",
       "  '0.135*\"recognit\" + 0.082*\"3d\" + 0.068*\"action\" + 0.061*\"face\" + 0.054*\"improv\" + 0.040*\"use\" + 0.038*\"space\" + 0.025*\"train\" + 0.025*\"prior\" + 0.022*\"model\"'),\n",
       " (18,\n",
       "  '0.067*\"dynam\" + 0.058*\"human\" + 0.052*\"interact\" + 0.045*\"decis\" + 0.038*\"robot\" + 0.034*\"system\" + 0.028*\"behavior\" + 0.027*\"user\" + 0.026*\"model\" + 0.023*\"predict\"'),\n",
       " (19,\n",
       "  '0.098*\"robust\" + 0.070*\"shape\" + 0.068*\"motion\" + 0.048*\"probabilist\" + 0.029*\"extend\" + 0.021*\"sens\" + 0.020*\"compress\" + 0.019*\"fine\" + 0.018*\"estim\" + 0.018*\"sat\"'),\n",
       " (20,\n",
       "  '0.061*\"estim\" + 0.060*\"infer\" + 0.056*\"model\" + 0.042*\"hierarch\" + 0.038*\"pose\" + 0.038*\"relat\" + 0.036*\"stochast\" + 0.035*\"point\" + 0.030*\"belief\" + 0.028*\"joint\"'),\n",
       " (21,\n",
       "  '0.241*\"imag\" + 0.058*\"spars\" + 0.039*\"regress\" + 0.039*\"classif\" + 0.028*\"spatial\" + 0.027*\"use\" + 0.027*\"code\" + 0.023*\"bayesian\" + 0.022*\"self\" + 0.021*\"local\"'),\n",
       " (22,\n",
       "  '0.085*\"match\" + 0.077*\"comput\" + 0.050*\"global\" + 0.047*\"answer\" + 0.037*\"combin\" + 0.036*\"identif\" + 0.033*\"commun\" + 0.033*\"environ\" + 0.030*\"vision\" + 0.024*\"transform\"'),\n",
       " (23,\n",
       "  '0.145*\"segment\" + 0.104*\"fast\" + 0.037*\"continu\" + 0.030*\"sequenti\" + 0.030*\"crowd\" + 0.029*\"vote\" + 0.027*\"specif\" + 0.026*\"ensembl\" + 0.021*\"manipul\" + 0.020*\"peopl\"'),\n",
       " (24,\n",
       "  '0.144*\"video\" + 0.103*\"onlin\" + 0.071*\"retriev\" + 0.040*\"hash\" + 0.039*\"depth\" + 0.033*\"cross\" + 0.029*\"modal\" + 0.024*\"binari\" + 0.017*\"dataset\" + 0.015*\"analysi\"'),\n",
       " (25,\n",
       "  '0.071*\"factor\" + 0.070*\"matrix\" + 0.064*\"singl\" + 0.050*\"weight\" + 0.030*\"time\" + 0.029*\"one\" + 0.027*\"norm\" + 0.021*\"seri\" + 0.021*\"2d\" + 0.019*\"revis\"'),\n",
       " (26,\n",
       "  '0.107*\"graph\" + 0.083*\"rank\" + 0.046*\"regular\" + 0.038*\"low\" + 0.033*\"bound\" + 0.032*\"project\" + 0.030*\"tensor\" + 0.025*\"studi\" + 0.023*\"bandit\" + 0.023*\"base\"'),\n",
       " (27,\n",
       "  '0.070*\"framework\" + 0.069*\"abstract\" + 0.053*\"program\" + 0.037*\"valu\" + 0.035*\"control\" + 0.032*\"iter\" + 0.029*\"invari\" + 0.029*\"attribut\" + 0.023*\"base\" + 0.023*\"unifi\"'),\n",
       " (28,\n",
       "  '0.051*\"metric\" + 0.047*\"design\" + 0.047*\"condit\" + 0.041*\"distanc\" + 0.037*\"convex\" + 0.036*\"mechan\" + 0.028*\"line\" + 0.026*\"composit\" + 0.023*\"two\" + 0.023*\"perspect\"'),\n",
       " (29,\n",
       "  '0.095*\"visual\" + 0.090*\"track\" + 0.072*\"multipl\" + 0.034*\"object\" + 0.033*\"model\" + 0.029*\"instanc\" + 0.029*\"explor\" + 0.025*\"use\" + 0.023*\"region\" + 0.023*\"topic\"')]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有的主题\n",
    "\n",
    "topic_model.model.print_topics(TOPIC_NUM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 尝试预测一个标题\n",
    "\n",
    "result = topic_model.predict('Understanding Behaviors that Lead to Purchasing: A Case Study of Pinterest.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(2, 0.12916666), (9, 0.12916668), (14, 0.12916666), (18, 0.12916666), (26, 0.37916666)]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['0.093*\"learn\" + 0.065*\"supervis\" + 0.056*\"scene\" + 0.054*\"view\" + 0.048*\"discrimin\" + 0.043*\"languag\" + 0.042*\"reconstruct\" + 0.041*\"machin\" + 0.039*\"semi\" + 0.029*\"stereo\"',\n",
       " '0.051*\"extract\" + 0.049*\"event\" + 0.045*\"word\" + 0.039*\"model\" + 0.037*\"exploit\" + 0.036*\"discoveri\" + 0.027*\"correspond\" + 0.025*\"relev\" + 0.024*\"appear\" + 0.024*\"beyond\"',\n",
       " '0.040*\"preserv\" + 0.037*\"correl\" + 0.033*\"heurist\" + 0.028*\"manag\" + 0.028*\"count\" + 0.021*\"symmetri\" + 0.021*\"simpl\" + 0.020*\"search\" + 0.019*\"privaci\" + 0.018*\"cooper\"',\n",
       " '0.067*\"dynam\" + 0.058*\"human\" + 0.052*\"interact\" + 0.045*\"decis\" + 0.038*\"robot\" + 0.034*\"system\" + 0.028*\"behavior\" + 0.027*\"user\" + 0.026*\"model\" + 0.023*\"predict\"',\n",
       " '0.107*\"graph\" + 0.083*\"rank\" + 0.046*\"regular\" + 0.038*\"low\" + 0.033*\"bound\" + 0.032*\"project\" + 0.030*\"tensor\" + 0.025*\"studi\" + 0.023*\"bandit\" + 0.023*\"base\"']"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(result.prob_list)\n",
    "result.topic_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from dblp import DBLPAuthorPaper\n",
    "from fp_growth import FPGrowth\n",
    "\n",
    "fp_growth = FPGrowth()\n",
    "fp_growth.fit(dataset.get_author_group_list(), min_support=3, min_size=3)\n",
    "\n",
    "author_group = DBLPAuthorPaper(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 统计团队发表的论文和论文涉及的主题\n",
    "\n",
    "group = DBLPGroup(fp_growth.items, author_group, topic_model)\n",
    "with open('result_1.txt', 'w', encoding='utf-8') as f:\n",
    "    for i in group.get_group_member():\n",
    "        f.write(str(i) + '\\n')\n",
    "with open('result_2.txt', 'w', encoding='utf-8') as f:\n",
    "    for i in group.groups:\n",
    "        f.write(str(i) + '\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 按照2007-2012以及2013-2017两个时间段，分析两个时间段团队的相似度\n",
    "\n",
    "fp_growth = FPGrowth()\n",
    "fp_growth.fit(dataset.get_author_group_list(start_year=2007, end_year=2012), min_support=3, min_size=3)\n",
    "author_group = DBLPAuthorPaper(dataset, start_year=2007, end_year=2012)\n",
    "group = DBLPGroup(fp_growth.items, author_group, topic_model)\n",
    "\n",
    "fp_growth_1 = FPGrowth()\n",
    "fp_growth_1.fit(dataset.get_author_group_list(start_year=2013, end_year=2017), min_support=3, min_size=3)\n",
    "author_group_1 = DBLPAuthorPaper(dataset, start_year=2013, end_year=2017)\n",
    "group_1 = DBLPGroup(fp_growth_1.items, author_group_1, topic_model)\n",
    "\n",
    "for g in group.groups:\n",
    "    g_1, similarity = g.find_similar_group(group_1)\n",
    "    if g_1 is not None:\n",
    "        print(g)\n",
    "        print(g_1)\n",
    "        print('Similarity: ' + str(similarity))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
